**********************************************************************************************
* Reproduces Table 7
*
* Ajzenman, Elacqua, Jaimovich and Pérez-Núñez (2024)."Humans versus Chatbots: Scaling-up behavioral interventions to reduce teacher shortages"  	  		
*
**********************************************************************************************


*Note: Intermediate steps are necessary through the causal forest analysis. For this, please refer to the 'Causal Forest' folder where the R scripts are located to replicate the causal forest analysis. Intermediate results used for the table construction are stored there and are necessary as input for the code shown below.

/// PANEL A : Education as first option 

*import causal forest sample data. 
clear
import excel "$cfpath\sample_cf_human.xlsx", sheet("Sheet 1") firstrow

destring sexo, replace 

mat D=J(12,4,.)
mat F=J(12,4,.)

local i= 1

*Sample mean in covariates
foreach var in sexo municipal subvencionado particular ptje_ranking agno_egreso prom_notas prom_cm_actual padres_incompleta padres_media_completa padres_superior bajo_linea_pobreza{
sum `var' 
mat D[`i',1]= r(mean)
mat F[`i',1]= r(sd)

local i =`i'+1

}

matlist D 

*import causal forest results. 
clear 
import excel "$cfpath\education_rankfirst.xlsx", sheet("Sheet 1") firstrow
drop scaling labels variation
keep if ranking=="Q1" | ranking=="Q5"


*put the average of Q1 and Q5 in the matrix, and the std. difference
local i= 1

foreach var in sexo municipal subvencionado particular ptje_ranking agno_egreso prom_notas prom_cm_actual padres_incompleta padres_media_completa padres_superior bajo_linea_pobreza{

sum avg if covariate=="`var'" & ranking=="Q1"
mat D[`i',2]= r(mean)

sum avg if covariate=="`var'" & ranking=="Q5"
mat D[`i',3]= r(mean)

mat D[`i',4]= (D[`i',2] - D[`i',3])/(F[`i',1])

local i =`i'+1

}

matlist D

mat rownames D= "Female" "Public high school" "Subsidized high school" "Private high school" "High school ranking score"  "Recent year of"  "High school GPA"  "Average math and verbal"  "Parents without high education"  "Parents with high education"  "Parents with higher education"  "Low-income Family"  

mat colnames D= "Mean" "Bottom 20" "Top 20" "Mean diff."

frmttable using "$outputpath\Table7a.tex", statmat(D) varlabels replace sdec(3) tex fr


/// PANEL B : Proportion of the choice set

*import causal forest sample data. 
clear
import excel "$cfpath\sample_cf_human.xlsx", sheet("Sheet 1") firstrow

destring sexo, replace 

mat D=J(12,4,.)
mat F=J(12,4,.)

local i= 1

*Sample mean in covariates
foreach var in sexo municipal subvencionado particular ptje_ranking agno_egreso prom_notas prom_cm_actual padres_incompleta padres_media_completa padres_superior bajo_linea_pobreza{
sum `var' 
mat D[`i',1]= r(mean)
mat F[`i',1]= r(sd)

local i =`i'+1

}

matlist D 

*import causal forest results. 
clear 
import excel "$cfpath\education_proportion.xlsx", sheet("Sheet 1") firstrow
drop scaling labels variation
keep if ranking=="Q1" | ranking=="Q5"


*put the average of Q1 and Q5 in the matrix, and the std. difference
local i= 1

foreach var in sexo municipal subvencionado particular ptje_ranking agno_egreso prom_notas prom_cm_actual padres_incompleta padres_media_completa padres_superior bajo_linea_pobreza{

sum avg if covariate=="`var'" & ranking=="Q1"
mat D[`i',2]= r(mean)

sum avg if covariate=="`var'" & ranking=="Q5"
mat D[`i',3]= r(mean)

mat D[`i',4]= (D[`i',2] - D[`i',3])/(F[`i',1])

local i =`i'+1

}

matlist D

mat rownames D= "Female" "Public high school" "Subsidized high school" "Private high school" "High school ranking score"  "Recent year of"  "High school GPA"  "Average math and verbal"  "Parents without high education"  "Parents with high education"  "Parents with higher education"  "Low-income Family"  

mat colnames D= "Mean" "Bottom 20" "Top 20" "Mean diff."

frmttable using "$outputpath\Table7b.tex", statmat(D) varlabels replace sdec(3) tex fr


/// PANEL C : Application at least once 

*import causal forest sample data. 
clear
import excel "$cfpath\sample_cf_human.xlsx", sheet("Sheet 1") firstrow

destring sexo, replace 

mat D=J(12,4,.)
mat F=J(12,4,.)

local i= 1

*Sample mean in covariates
foreach var in sexo municipal subvencionado particular ptje_ranking agno_egreso prom_notas prom_cm_actual padres_incompleta padres_media_completa padres_superior bajo_linea_pobreza{
sum `var' 
mat D[`i',1]= r(mean)
mat F[`i',1]= r(sd)

local i =`i'+1

}

matlist D 

*import causal forest results. 
clear 
import excel "$cfpath\education_listed.xlsx", sheet("Sheet 1") firstrow
drop scaling labels variation
keep if ranking=="Q1" | ranking=="Q5"


*put the average of Q1 and Q5 in the matrix, and the std. difference
local i= 1

foreach var in sexo municipal subvencionado particular ptje_ranking agno_egreso prom_notas prom_cm_actual padres_incompleta padres_media_completa padres_superior bajo_linea_pobreza{

sum avg if covariate=="`var'" & ranking=="Q1"
mat D[`i',2]= r(mean)

sum avg if covariate=="`var'" & ranking=="Q5"
mat D[`i',3]= r(mean)

mat D[`i',4]= (D[`i',2] - D[`i',3])/(F[`i',1])

local i =`i'+1

}

matlist D

mat rownames D= "Female" "Public high school" "Subsidized high school" "Private high school" "High school ranking score"  "Recent year of"  "High school GPA"  "Average math and verbal"  "Parents without high education"  "Parents with high education"  "Parents with higher education"  "Low-income Family"  

mat colnames D= "Mean" "Bottom 20" "Top 20" "Mean diff."

frmttable using "$outputpath\Table7c.tex", statmat(D) varlabels replace sdec(3) tex fr